"""
@Time: 2021/3/10 下午 9:27
@Author: jinzhuan
@File: ontonotesb.py
@Desc: 
"""
import os
from ..loader import Loader
from cognlp import DataTable


class OntoNotesBinaryEtLoader(Loader):
    def __init__(self):
        super().__init__()

    def _load(self, path):
        dataset = DataTable()
        with open(path) as f:
            while True:
                line = f.readline()
                if not line:
                    break
                words = line.split('\t')
                tokens = words[2].split(' ')
                mention = words[4].strip().split(' ')[0]
                self.label_set.add(mention)
                dataset('words', tokens)
                dataset('mention', mention)
                dataset('start', int(words[0]))
                dataset('end', int(words[1]))
        return dataset

    def load_all(self, path):
        train_path = os.path.join(path, 'train_clean.tsv-100-clean')
        dev_path = os.path.join(path, 'test_clean.tsv')
        test_path = os.path.join(path, 'test_clean.tsv')
        return self._load(train_path), self._load(dev_path), self._load(test_path)
